## The functions in the rglwidget package have been moved to rgl.
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## Loading required package: viridisLite
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine

FEP Project Data Analysis

Data Loading and Formating

I have converted the Excel file to a CSV file so R can be able to read the data. Blank cells appear as N/A values here

#Load data
fepdata1 = read.csv("FEP_project_reloaded_28.4.2021.csv", stringsAsFactors = F)
fepdata = fepdata1[complete.cases(fepdata1[,c(6)]) | complete.cases(fepdata1[,c(7)]) | complete.cases(fepdata1[,c(8)]),]

#Make phenotype 0 and 1. 0 for Healthy, 1 for Cases.
fepdata$Phenotype = as.factor(ifelse(fepdata$Phenotype=="D",1,0))
fepdata$pheno_lm = as.factor(ifelse(fepdata$Phenotype==0,"Control","Case"))
#Add phenotype colours
selcols = c("#648FFF","#DC267F")
fepdata$colour = selcols[fepdata$Phenotype]

#Multivariate Analysis

Investigate differences between Controls and Cases

Distributions of the phosphorylation values

par(mfrow=c(3,3))
hist(fepdata$pAkt.value, breaks=20, xlab="pAkt", main="pAkt")
hist(fepdata$pGSK3.value, breaks=20, xlab="pGSK3", main="pGSK3")
hist(fepdata$pS6.value, breaks=20, xlab="pS6", main="pS6")

plot(density(fepdata$pAkt.value, na.rm = T), main="")
plot(density(fepdata$pGSK3.value, na.rm = T), main="")
plot(density(fepdata$pS6.value, na.rm = T), main="")

qqnorm(fepdata$pAkt.value, pch = 1, frame = FALSE)
qqline(fepdata$pAkt.value, col = "steelblue", lwd = 2)

qqnorm(fepdata$pGSK3.value, pch = 1, frame = FALSE)
qqline(fepdata$pGSK3.value, col = "steelblue", lwd = 2)

qqnorm(fepdata$pS6.value, pch = 1, frame = FALSE)
qqline(fepdata$pS6.value, col = "steelblue", lwd = 2)

shapiro.test(fepdata$pAkt.value)
## 
##  Shapiro-Wilk normality test
## 
## data:  fepdata$pAkt.value
## W = 0.95549, p-value = 0.01239
shapiro.test(fepdata$pGSK3.value)
## 
##  Shapiro-Wilk normality test
## 
## data:  fepdata$pGSK3.value
## W = 0.93591, p-value = 0.001555
shapiro.test(fepdata$pS6.value)
## 
##  Shapiro-Wilk normality test
## 
## data:  fepdata$pS6.value
## W = 0.95438, p-value = 0.01076

All variables look left-skewed. The normality test shows that all values do not follow a normal distribution (p shapiro < 0.05).

Center values and perform Z-score transformation

fepdata$pAkt.value_z = as.vector(scale(fepdata$pAkt.value,center = T, scale=T))
fepdata$pS6.value_z = as.vector(scale(fepdata$pS6.value,center = T, scale=T))
fepdata$pGSK3.value_z = as.vector(scale(fepdata$pGSK3.value,center = T, scale=T))

boxplot(main="Before Z-score transformation", fepdata[,c(6:8)], las=1, col="steelblue", outline=F)
stripchart(fepdata[,c(6:8)], vertical = TRUE, method = "jitter", add = TRUE, pch = 20, col = 'blue')

boxplot(main="After Z-score transformation", fepdata[,c(12:14)], las=1, col="steelblue", outline=F)
stripchart(fepdata[,c(12:14)], vertical = TRUE, method = "jitter", add = TRUE, pch = 20, col = 'blue')

Cases versus Controls

Let’s perform tests to see how different are the phosphorylation values for each kinase between cases and controls.

We are performing Mann–Whitney U test (non-parametric) as we cannot assume normality for any of the variables.

pAkt

pakt_test = wilcox.test(fepdata$pAkt.value[fepdata$Phenotype==0],fepdata$pAkt.value[fepdata$Phenotype==1])

fep_wide = fepdata[,c(1,9,6:8)] %>% pivot_longer(c(3:5), names_to = 'kinase', values_to = "phospho") %>% data.frame()
p <- ggplot(fep_wide, aes(x=kinase, y=phospho, fill=Phenotype)) + geom_boxplot(aes(fill=Phenotype),outlier.shape = NA)
p <- p + geom_point(position=position_jitterdodge(),alpha=0.3)
p <- p + facet_wrap( ~ kinase, scales="free")
p <- p + xlab("Kinase") + ylab("Phosphorylation")
p <- p + stat_compare_means(aes(group = Phenotype), label.x = 1, label.y = -0.1, size=3)
p <- p + guides(fill=guide_legend(title="Phenotype")) 
p 

pakt_test = wilcox.test(fepdata$pAkt.value[fepdata$Phenotype==0],fepdata$pAkt.value[fepdata$Phenotype==1])
pgsk3_test = wilcox.test(fepdata$pGSK3.value[fepdata$Phenotype==0],fepdata$pGSK3.value[fepdata$Phenotype==1])
ps6_test = wilcox.test(fepdata$pS6.value[fepdata$Phenotype==0],fepdata$pS6.value[fepdata$Phenotype==1])

pAkt Cases vs Controls

print(pakt_test)
## 
##  Wilcoxon rank sum exact test
## 
## data:  fepdata$pAkt.value[fepdata$Phenotype == 0] and fepdata$pAkt.value[fepdata$Phenotype == 1]
## W = 790, p-value = 0.1114
## alternative hypothesis: true location shift is not equal to 0

pGSK3 Cases vs Controls

print(pgsk3_test)
## 
##  Wilcoxon rank sum exact test
## 
## data:  fepdata$pGSK3.value[fepdata$Phenotype == 0] and fepdata$pGSK3.value[fepdata$Phenotype == 1]
## W = 365, p-value = 0.005368
## alternative hypothesis: true location shift is not equal to 0

pS6 Cases vs Control

print(ps6_test)
## 
##  Wilcoxon rank sum exact test
## 
## data:  fepdata$pS6.value[fepdata$Phenotype == 0] and fepdata$pS6.value[fepdata$Phenotype == 1]
## W = 969, p-value = 0.0002204
## alternative hypothesis: true location shift is not equal to 0

It appears the pS6 shows the greatest difference between cases and controls , compared to the other kinases. pGSK3 also shows a great statistically significant difference, while pAtk show a mild but significant difference.

Cases versus Controls using Zscore transformed values

Let’s perform tests to see how different are the phosphorylation values for each kinase between cases and controls using Z-score transformed value.

We are performing Mann–Whitney U test (non-parametric) as we cannot assume normality for any of the variables.

pAkt

pakt_test = wilcox.test(fepdata$pAkt.value_z[fepdata$Phenotype==0],fepdata$pAkt.value_z[fepdata$Phenotype==1])

fep_wide = fepdata[,c(1,9,12:14)] %>% pivot_longer(c(3:5), names_to = 'kinase', values_to = "phospho")
p <- ggplot(fep_wide, aes(x=kinase, y=phospho, fill=Phenotype)) + geom_boxplot(aes(fill=Phenotype),outlier.shape = NA)
p <- p + geom_point(position=position_jitterdodge(),alpha=0.3)
p <- p + facet_wrap( ~ kinase, scales="free")
p <- p + xlab("Kinase") + ylab("Phosphorylation")
p <- p + stat_compare_means(aes(group = Phenotype), label.x = 1, label.y = -2, size=3)
p <- p + guides(fill=guide_legend(title="Phenotype")) 
p 

pakt_test = wilcox.test(fepdata$pAkt.value_z[fepdata$Phenotype==0],fepdata$pAkt.value_z[fepdata$Phenotype==1])
pgsk3_test = wilcox.test(fepdata$pGSK3.value_z[fepdata$Phenotype==0],fepdata$pGSK3.value_z[fepdata$Phenotype==1])
ps6_test = wilcox.test(fepdata$pS6.value_z                [fepdata$Phenotype==0],fepdata$pS6.value_z[fepdata$Phenotype==1])

pAkt Cases vs Controls

print(pakt_test)
## 
##  Wilcoxon rank sum exact test
## 
## data:  fepdata$pAkt.value_z[fepdata$Phenotype == 0] and fepdata$pAkt.value_z[fepdata$Phenotype == 1]
## W = 790, p-value = 0.1114
## alternative hypothesis: true location shift is not equal to 0

pGSK3 Cases vs Controls

print(pgsk3_test)
## 
##  Wilcoxon rank sum exact test
## 
## data:  fepdata$pGSK3.value_z[fepdata$Phenotype == 0] and fepdata$pGSK3.value_z[fepdata$Phenotype == 1]
## W = 365, p-value = 0.005368
## alternative hypothesis: true location shift is not equal to 0

pS6 Cases vs Control

print(ps6_test)
## 
##  Wilcoxon rank sum exact test
## 
## data:  fepdata$pS6.value_z[fepdata$Phenotype == 0] and fepdata$pS6.value_z[fepdata$Phenotype == 1]
## W = 969, p-value = 0.0002204
## alternative hypothesis: true location shift is not equal to 0

It appears the pS6 shows the greatest difference between cases and controls , compared to the other kinases. pGSK3 also shows a great statistically significant difference, while pAkt doesn’t seem to change.

Model the Phenotype (Regression model)

Because the Phenotype is a catergorical and binary variable we’ll try to fit a binomial/logistic regression.

We’ll fit two models: 1) Phenotype ~ pS6 + pGSK3 + pAkt. 2) Phenotype ~ pS6 + pGSK3 + pAkt + all interactions between them.

Fit the models:

model_1 = glm(fepdata$pheno_lm ~ fepdata$pS6.value_z + fepdata$pGSK3.value_z + fepdata$pAkt.value_z, family="binomial")
model_2 = glm(fepdata$pheno_lm ~ fepdata$pS6.value_z * fepdata$pGSK3.value_z * fepdata$pAkt.value_z, family="binomial")
probabilities <- predict(model_1, type = "response")
all_models = list(model_1,model_2)

We need to check whether our data follows the logistic regression assumptions, to conclude if our model fitting is valid.

Check if our data fullfil the logistic regression assumptions:

The logistic regression method assumes that:

  1. The outcome is a binary or dichotomous variable like yes vs no, positive vs negative, 1 vs 0.

That’s true. THe phenotype is a binary variable; Cases vs Controls.

  1. There is a linear relationship between the logit of the outcome and each predictor variables. Recall that the logit function is logit(p) = log(p/(1-p)), where p is the probabilities of the outcome.
# Select only numeric predictors
mydata <- fepdata[12:14] %>%
  dplyr::select_if(is.numeric) 
predictors <- colnames(mydata)
# Bind the logit and tidying the data for plot
mydata <- mydata %>% na.omit() %>%
  mutate(logit = log(probabilities/(1-probabilities))) %>%
  gather(key = "predictors", value = "predictor.value", -logit)

ggplot(mydata, aes(logit, predictor.value))+
  geom_point(size = 0.5, alpha = 0.5) +
  geom_smooth(method = "loess") + 
  theme_bw() + 
  facet_wrap(~predictors, scales = "free_y")

The smoothed scatter plots show that variables pAkt, pGSK3 and PS6 are all quite linearly associated with the Phenotype in logit scale. So this assumption is being fullfilled!

  1. There are no influential values (extreme values or outliers) in the continuous predictors for all models
nameit <- function(v1) {
  deparse(substitute(v1))
}

check_outliers <- function(model){
  mdata <- augment(model) %>% 
  mutate(index = 1:n())
  cat("Examined model standardised residuals: ")
  cat(paste0(model$call,"\n"))
  print(summary(mdata$`.std.resid`))
  cat("\n\n")
}

lapply(all_models, function(x) check_outliers(x))
## Examined model standardised residuals: glm
##  fepdata$pheno_lm ~ fepdata$pS6.value_z + fepdata$pGSK3.value_z + fepdata$pAkt.value_z
##  binomial
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -1.888853 -0.730267 -0.133423  0.008236  0.739811  2.265495 
## 
## 
## Examined model standardised residuals: glm
##  fepdata$pheno_lm ~ fepdata$pS6.value_z * fepdata$pGSK3.value_z * fepdata$pAkt.value_z
##  binomial
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -2.19971 -0.75261 -0.04279 -0.00820  0.63948  2.32111
## [[1]]
## NULL
## 
## [[2]]
## NULL

There are no standardised residuals > 3 so our data doesn’t have influential data points!

  1. There is no high intercorrelations (i.e. multicollinearity) among the predictors for all models.
call_vif <- function(model){
  cat("Calling VIF for model:")
  cat(paste0(model$call,"\n"))
  print(vif(model))
  cat("\n\n")
}

lapply(all_models, function(x) call_vif(x))
## Calling VIF for model:glm
##  fepdata$pheno_lm ~ fepdata$pS6.value_z + fepdata$pGSK3.value_z + fepdata$pAkt.value_z
##  binomial
##   fepdata$pS6.value_z fepdata$pGSK3.value_z  fepdata$pAkt.value_z 
##              1.174901              1.365275              1.176885 
## 
## 
## Calling VIF for model:glm
##  fepdata$pheno_lm ~ fepdata$pS6.value_z * fepdata$pGSK3.value_z * fepdata$pAkt.value_z
##  binomial
##                                            fepdata$pS6.value_z 
##                                                       1.472502 
##                                          fepdata$pGSK3.value_z 
##                                                       1.769428 
##                                           fepdata$pAkt.value_z 
##                                                       1.698074 
##                      fepdata$pS6.value_z:fepdata$pGSK3.value_z 
##                                                       1.936608 
##                       fepdata$pS6.value_z:fepdata$pAkt.value_z 
##                                                       2.114839 
##                     fepdata$pGSK3.value_z:fepdata$pAkt.value_z 
##                                                       1.743860 
## fepdata$pS6.value_z:fepdata$pGSK3.value_z:fepdata$pAkt.value_z 
##                                                       1.888453
## [[1]]
## NULL
## 
## [[2]]
## NULL

As a rule of thumb, a VIF value that exceeds 5 or 10 indicates a problematic amount of collinearity. In our example, there is no collinearity: all variables have a value of VIF well below 5.

So now we made sure all our models follows all the assumptions of the logistic regression!

Model Coefficients

get_model_sum <- function(model){
  cat("----------------------------------------------------------")
  print(summary(model))
  cat("\n")
  print(anova(model, test="Chisq"))
  cat("----------------------------------------------------------")
  cat("\n\n\n\n")
}
lapply(all_models, function(x) get_model_sum(x))
## ----------------------------------------------------------
## Call:
## glm(formula = fepdata$pheno_lm ~ fepdata$pS6.value_z + fepdata$pGSK3.value_z + 
##     fepdata$pAkt.value_z, family = "binomial")
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7652  -0.6984  -0.1321   0.7114   2.2227  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            -0.1938     0.3214  -0.603 0.546414    
## fepdata$pS6.value_z     1.2154     0.3744   3.247 0.001168 ** 
## fepdata$pGSK3.value_z  -1.5442     0.4593  -3.362 0.000774 ***
## fepdata$pAkt.value_z    0.7302     0.3522   2.074 0.038122 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 95.640  on 68  degrees of freedom
## Residual deviance: 62.456  on 65  degrees of freedom
##   (3 observations deleted due to missingness)
## AIC: 70.456
## 
## Number of Fisher Scoring iterations: 5
## 
## 
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: fepdata$pheno_lm
## 
## Terms added sequentially (first to last)
## 
## 
##                       Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                                     68     95.640              
## fepdata$pS6.value_z    1  12.4445        67     83.195 0.0004192 ***
## fepdata$pGSK3.value_z  1  15.6045        66     67.591 7.807e-05 ***
## fepdata$pAkt.value_z   1   5.1345        65     62.456 0.0234552 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## ----------------------------------------------------------
## 
## 
## 
## ----------------------------------------------------------
## Call:
## glm(formula = fepdata$pheno_lm ~ fepdata$pS6.value_z * fepdata$pGSK3.value_z * 
##     fepdata$pAkt.value_z, family = "binomial")
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.13245  -0.64440  -0.04255   0.60260   2.23839  
## 
## Coefficients:
##                                                                Estimate
## (Intercept)                                                    -0.02023
## fepdata$pS6.value_z                                             1.54788
## fepdata$pGSK3.value_z                                          -1.81311
## fepdata$pAkt.value_z                                            1.19501
## fepdata$pS6.value_z:fepdata$pGSK3.value_z                      -0.86815
## fepdata$pS6.value_z:fepdata$pAkt.value_z                        0.74249
## fepdata$pGSK3.value_z:fepdata$pAkt.value_z                      0.10387
## fepdata$pS6.value_z:fepdata$pGSK3.value_z:fepdata$pAkt.value_z -0.32824
##                                                                Std. Error
## (Intercept)                                                       0.39790
## fepdata$pS6.value_z                                               0.48242
## fepdata$pGSK3.value_z                                             0.57922
## fepdata$pAkt.value_z                                              0.46456
## fepdata$pS6.value_z:fepdata$pGSK3.value_z                         0.61977
## fepdata$pS6.value_z:fepdata$pAkt.value_z                          0.58163
## fepdata$pGSK3.value_z:fepdata$pAkt.value_z                        0.51326
## fepdata$pS6.value_z:fepdata$pGSK3.value_z:fepdata$pAkt.value_z    0.50106
##                                                                z value Pr(>|z|)
## (Intercept)                                                     -0.051  0.95946
## fepdata$pS6.value_z                                              3.209  0.00133
## fepdata$pGSK3.value_z                                           -3.130  0.00175
## fepdata$pAkt.value_z                                             2.572  0.01010
## fepdata$pS6.value_z:fepdata$pGSK3.value_z                       -1.401  0.16129
## fepdata$pS6.value_z:fepdata$pAkt.value_z                         1.277  0.20175
## fepdata$pGSK3.value_z:fepdata$pAkt.value_z                       0.202  0.83963
## fepdata$pS6.value_z:fepdata$pGSK3.value_z:fepdata$pAkt.value_z  -0.655  0.51240
##                                                                  
## (Intercept)                                                      
## fepdata$pS6.value_z                                            **
## fepdata$pGSK3.value_z                                          **
## fepdata$pAkt.value_z                                           * 
## fepdata$pS6.value_z:fepdata$pGSK3.value_z                        
## fepdata$pS6.value_z:fepdata$pAkt.value_z                         
## fepdata$pGSK3.value_z:fepdata$pAkt.value_z                       
## fepdata$pS6.value_z:fepdata$pGSK3.value_z:fepdata$pAkt.value_z   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 95.640  on 68  degrees of freedom
## Residual deviance: 56.194  on 61  degrees of freedom
##   (3 observations deleted due to missingness)
## AIC: 72.194
## 
## Number of Fisher Scoring iterations: 6
## 
## 
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: fepdata$pheno_lm
## 
## Terms added sequentially (first to last)
## 
## 
##                                                                Df Deviance
## NULL                                                                      
## fepdata$pS6.value_z                                             1  12.4445
## fepdata$pGSK3.value_z                                           1  15.6045
## fepdata$pAkt.value_z                                            1   5.1345
## fepdata$pS6.value_z:fepdata$pGSK3.value_z                       1   2.3553
## fepdata$pS6.value_z:fepdata$pAkt.value_z                        1   3.1912
## fepdata$pGSK3.value_z:fepdata$pAkt.value_z                      1   0.2161
## fepdata$pS6.value_z:fepdata$pGSK3.value_z:fepdata$pAkt.value_z  1   0.5002
##                                                                Resid. Df
## NULL                                                                  68
## fepdata$pS6.value_z                                                   67
## fepdata$pGSK3.value_z                                                 66
## fepdata$pAkt.value_z                                                  65
## fepdata$pS6.value_z:fepdata$pGSK3.value_z                             64
## fepdata$pS6.value_z:fepdata$pAkt.value_z                              63
## fepdata$pGSK3.value_z:fepdata$pAkt.value_z                            62
## fepdata$pS6.value_z:fepdata$pGSK3.value_z:fepdata$pAkt.value_z        61
##                                                                Resid. Dev
## NULL                                                               95.640
## fepdata$pS6.value_z                                                83.195
## fepdata$pGSK3.value_z                                              67.591
## fepdata$pAkt.value_z                                               62.456
## fepdata$pS6.value_z:fepdata$pGSK3.value_z                          60.101
## fepdata$pS6.value_z:fepdata$pAkt.value_z                           56.910
## fepdata$pGSK3.value_z:fepdata$pAkt.value_z                         56.694
## fepdata$pS6.value_z:fepdata$pGSK3.value_z:fepdata$pAkt.value_z     56.194
##                                                                 Pr(>Chi)    
## NULL                                                                        
## fepdata$pS6.value_z                                            0.0004192 ***
## fepdata$pGSK3.value_z                                          7.807e-05 ***
## fepdata$pAkt.value_z                                           0.0234552 *  
## fepdata$pS6.value_z:fepdata$pGSK3.value_z                      0.1248544    
## fepdata$pS6.value_z:fepdata$pAkt.value_z                       0.0740347 .  
## fepdata$pGSK3.value_z:fepdata$pAkt.value_z                     0.6420501    
## fepdata$pS6.value_z:fepdata$pGSK3.value_z:fepdata$pAkt.value_z 0.4793909    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## ----------------------------------------------------------
## [[1]]
## NULL
## 
## [[2]]
## NULL

Model output explanation:

Coefficients:

  • Under Estimate in the second row is the coefficient associated with the variable listed to the left. It is the estimated amount by which the log odds of Phenotype would increase if each variable (i.e. pS6) was one unit higher.
    • The log odds of Phenotype when all the variables are 0 is just above in the first row (Intercept).
  • In the next column, we see the standard error associated with these estimates; That is, they are an estimate of how much, on average, these estimates would bounce around if the study were re-run identically, but with new data, over and over.
  • If we were to divide the estimate by the standard error, we would get a quotient which is assumed to be normally distributed with large enough samples. This value is listed in under z value.
  • Below Pr(>|z|) are listed the two-tailed p-values that correspond to those z-values in a standard normal distribution. Lastly, there are the traditional significance stars

Goodness of fit: The Residual deviance is a measure of the lack of fit of your model taken as a whole, whereas the Null deviance is such a measure for a reduced model that only includes the intercept. Notice that the degrees of freedom associated with these two differs by only the number of variables we included in the model. model_1 has only two covariates (two additional degrees of freedom), while model_2 has four (four additional degrees of freedom).

The AIC is another measure of goodness of fit that takes into account the ability of the model to fit the data. This is very useful when comparing two models.

Summary

  • By comparing the AIC values it seeems that model_1 (Including pGSK3,pS6 and pAkt) is the best to explain the phenotype changes.
  • It doesn’t look like that pAkt-pGSK3 interaction or any other interaction between the kinases influence the phenotype (model_2)

Scatterplot matrices with Pearson correlation

All samples

This is a scatterplot matrix between pAkt, pS6, pGSK3 and phenotype values.

panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...)
{
 usr <- par("usr"); on.exit(par(usr))
 par(usr = c(0, 1, 0, 1))
 cortest <- cor.test(x, y, use="complete.obs", method = "spearman", exact = F)
 r = abs(cortest$estimate)
 p = cortest$p.value
 txt1 <- format(c(r, 0.123456789), digits = digits)[1]
 txt2 <- format(c(p, 0.123456789), digits = digits)[1]
 txt <- paste0(prefix, "r=",txt1, ", ", "p=", txt2)
 text(0.5, 0.5, txt, cex = 0.8, font = 4)
}

reg <- function(x, y, col) abline(lm(y~x), col=col) 

panel.lm =  function (x, y, col = par("col"), bg = NA, pch = par("pch"), 
    cex = 1, col.smooth = "red", span = 2/3, iter = 3, ...)  {
    points(x, y, pch = pch, col = col, bg = bg, cex = 0.7)
    ok <- is.finite(x) & is.finite(y)
    if (any(ok)) reg(x[ok], y[ok], col.smooth)
}

#png("FEPvsControls_v5_pairs_all.png", units="in", pointsize = 15, width=7, height=5, res=1800)
pairs(fepdata[c(12:14,10)], panel = panel.lm,
      cex = 50, pch = 19, col = fepdata$colour, cex.labels = 1,
      font.labels = 2, lower.panel = panel.cor)

#dev.off()
scatterplotMatrix(x=fepdata[c(6:8,10)], groups = fepdata$Phenotype, col = selcols, legend = F)

In this matrix scatterplot, the diagonal cells show histograms of each of the variables, in this case the values of pAkt, pS6, pGSK3 and the phenotype (0 for controls, 1 for cases) values.

Each of the off-diagonal cells in the upper part of the table is a scatterplot of two of the 4 variables with a regression line fitted, for example, A4 is a scatterplot of pAkt(y) against Phenotype (0 OR 1).

Each of the off-diagonal cells in the lower part of the table contains the Spearman Correlation value of two of the 4 variables. for example, C1 shows the Pearson correlation between PAkt and PGSK3.

Controls Only

fepdata_temp = (subset(fepdata,Phenotype==0))

panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...)
{
usr <- par("usr"); on.exit(par(usr))
 par(usr = c(0, 1, 0, 1))
 cortest <- cor.test(x, y, use="complete.obs", method = "spearman", exact = F)
 r = abs(cortest$estimate)
 p = cortest$p.value
 txt1 <- format(c(r, 0.123456789), digits = digits)[1]
 txt2 <- format(c(p, 0.123456789), digits = digits)[1]
 txt <- paste0(prefix, "r=",txt1, ", ", "p=", txt2)
 text(0.5, 0.5, txt, cex = 1, font = 4)
}

reg <- function(x, y, col) abline(lm(y~x), col=col) 

panel.lm =  function (x, y, col = par("col"), bg = NA, pch = par("pch"), 
    cex = 1, col.smooth = "red", span = 2/3, iter = 3, ...)  {
    points(x, y, pch = pch, col = col, bg = bg, cex = cex)
    ok <- is.finite(x) & is.finite(y)
    if (any(ok)) reg(x[ok], y[ok], col.smooth)
}

pairs(fepdata_temp[c(6:8)], panel = panel.lm,
      cex = 1.5, pch = 19, col = fepdata_temp$colour, cex.labels = 2,
      font.labels = 2, lower.panel = panel.cor)

scatterplotMatrix(x=fepdata_temp[c(12:14)], col = selcols[1])

Same description as above.

Cases only

fepdata_temp = (subset(fepdata,Phenotype==1))

panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...)
{
 usr <- par("usr"); on.exit(par(usr))
 par(usr = c(0, 1, 0, 1))
 cortest <- cor.test(x, y, use="complete.obs", method = "spearman", exact = F)
 r = abs(cortest$estimate)
 p = cortest$p.value
 txt1 <- format(c(r, 0.123456789), digits = digits)[1]
 txt2 <- format(c(p, 0.123456789), digits = digits)[1]
 txt <- paste0(prefix, "r=",txt1, ", ", "p=", txt2)
 text(0.5, 0.5, txt, cex = 1, font = 4)
}

reg <- function(x, y, col) abline(lm(y~x), col=col) 

panel.lm =  function (x, y, col = par("col"), bg = NA, pch = par("pch"), 
    cex = 1, col.smooth = "red", span = 2/3, iter = 3, ...)  {
    points(x, y, pch = pch, col = col, bg = bg, cex = cex)
    ok <- is.finite(x) & is.finite(y)
    if (any(ok)) reg(x[ok], y[ok], col.smooth)
}

pairs(fepdata_temp[c(12:14)], panel = panel.lm,
      cex = 1.5, pch = 19, col = fepdata_temp$colour, cex.labels = 2,
      font.labels = 2, lower.panel = panel.cor)

scatterplotMatrix(x=fepdata_temp[c(12:14)], col = selcols[2])

Same description as above.

Adding all variables

fepdata_temp = (subset(fepdata,Phenotype==1))

panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...)
{
 usr <- par("usr"); on.exit(par(usr))
 par(usr = c(0, 1, 0, 1))
 cortest <- cor.test(x, y, use="complete.obs", method = "spearman", exact = F)
 r = abs(cortest$estimate)
 p = cortest$p.value
 txt1 <- format(c(r, 0.123456789), digits = digits)[1]
 txt2 <- format(c(p, 0.123456789), digits = digits)[1]
 txt <- paste0(prefix, "r=",txt1, ", ", "p=", txt2)
 text(0.5, 0.5, txt, cex = 1.2, font = 4)
}

reg <- function(x, y, col) abline(lm(y~x), col=col) 

panel.lm =  function (x, y, col = par("col"), bg = NA, pch = par("pch"), 
    cex = 1, col.smooth = "red", span = 2/3, iter = 3, ...)  {
    points(x, y, pch = pch, col = col, bg = bg, cex = cex)
    ok <- is.finite(x) & is.finite(y)
    if (any(ok)) reg(x[ok], y[ok], col.smooth)
}

#png("FEP_v5_pairs_all.png", units="in", width=10, height=8, res=1800)
pairs(fepdata_temp[c(2:5,12:14)], panel = panel.lm,
      cex = 1.2, pch = 19, col = fepdata_temp$colour, cex.labels = 1.5,
      font.labels = 1, lower.panel = panel.cor)

#dev.off()

3D Plots

All samples

Cases vs Controls in the same graph

Same graph - Red Cases - Blue Controls - GSK3 same color range

fepdata_cont = (subset(fepdata,Phenotype==0))
fepdata_case = (subset(fepdata,Phenotype==1))
gsk_max = max(c(fepdata_cont$pGSK3.value_z,fepdata_case$pGSK3.value_z), na.rm=TRUE)
gsk_min = min(c(fepdata_cont$pGSK3.value_z,fepdata_case$pGSK3.value_z), na.rm=TRUE)
akt_max = max(c(fepdata_cont$pAkt.value_z,fepdata_case$pAkt.value_z), na.rm=TRUE)
akt_min = min(c(fepdata_cont$pAkt.value_z,fepdata_case$pAkt.value_z),na.rm=TRUE)
ps6_max = max(c(fepdata_cont$pS6.value_z,fepdata_case$pS6.value_z), na.rm=TRUE)
ps6_min = min(c(fepdata_cont$pS6.value_z,fepdata_case$pS6.value_z), na.rm=TRUE)
pp <- ggplot() +
      geom_point(data=fepdata_cont, aes(x=pAkt.value_z, y=pS6.value_z, color=pGSK3.value_z),shape=19,size=3) +
      scale_color_gradient(low="white", high="#0000cc",limits=c(gsk_min-0.5,gsk_max)) +#
      new_scale_colour() +
      geom_point(data=fepdata_case, aes(x=pAkt.value_z, y=pS6.value_z, color=pGSK3.value_z),shape=19,size=3) +
      scale_color_gradient(low="white", high="#cc0000",limits=c(gsk_min-0.5,gsk_max)) +
      xlim(akt_min-0.1,akt_max+0.1) + ylim(ps6_min-0.1,ps6_max+0.1) +
      theme_bw()
pp

pp3 <- ggplot() +
      geom_point(data=fepdata, aes(x=pAkt.value_z, y=pS6.value_z, size=pGSK3.value_z, color=pheno_lm),shape=20) +
      scale_color_manual(values = c("#DC267F","#648FFF")) + 
      xlim(akt_min-0.1,akt_max+0.1) + ylim(ps6_min-0.1,ps6_max+0.1) + 
      theme_bw()
pp3

pp2 <- ggplot() +
      geom_point(data=fepdata_cont, aes(x=pGSK3.value_z, y=pS6.value_z, color=pAkt.value_z),shape=19,size=5) +
      scale_color_gradient(low="white", high="#1a5bff",limits=c(akt_min-0.5,akt_max)) +#
      new_scale_colour() +
      geom_point(data=fepdata_case, aes(x=pGSK3.value_z, y=pS6.value_z, color=pAkt.value_z),shape=19,size=5) +
      scale_color_gradient(low="white", high="#c52070",limits=c(akt_min-0.5,akt_max)) +
      xlim(gsk_min-0.1,gsk_max+0.1) + ylim(ps6_min-0.1,ps6_max+0.1) +
      theme_bw()
pp2

pp4 <- ggplot() +
      geom_point(data=fepdata, aes(x=pGSK3.value_z, y=pS6.value_z, size=pAkt.value_z, color=pheno_lm),shape=20) +
      scale_color_manual(values = c("#DC267F","#648FFF")) + 
      xlim(gsk_min-0.1,gsk_max+0.1) + ylim(ps6_min-0.1,ps6_max+0.1) +
      theme_bw()
pp4

#png("FEPvsControls_v5_all_p_gsk3size.png", units="in", width=8, height=5, res=1800)
pp3 + theme(axis.text = element_text(size = 14), legend.text=element_text(size=14), legend.title=element_text(size=15),
            axis.title=element_text(size=15)) + labs(size="pGSK3 Z-value", colour="Phenotype") + scale_size(range = c(3,9)) 

#dev.off()

#png("FEPvsControls_v5_all_p_aktsize.png", units="in", width=8, height=5, res=1800)
pp4 + theme(axis.text = element_text(size = 14), legend.text=element_text(size=14), legend.title=element_text(size=15),
              axis.title=element_text(size=15)) + labs(size="pAkt Z-value", colour="Phenotype") + scale_size(range = c(3,10))

#dev.off()

#png("FEPvsControls_v5_all_p_gsk3size2.png", units="in", width=8, height=5, res=1800)
pp3 + theme(axis.text = element_text(size = 14), legend.text=element_text(size=14), legend.title=element_text(size=15),
            axis.title=element_text(size=15)) + labs(size="pGSK3 Z-value", colour="Phenotype") + scale_size(range = c(2,8)) 

#dev.off()

#png("FEPvsControls_v5_all_p_aktsize2.png", units="in", width=8, height=5, res=1800)
pp4 + theme(axis.text = element_text(size = 14), legend.text=element_text(size=14), legend.title=element_text(size=15),
            axis.title=element_text(size=15)) + labs(size="pAkt Z-value", colour="Phenotype") + scale_size(range = c(2,9)) 

#dev.off()


#png("FEPvsControls_v5_all_p_v2.png", units="in", width=8, height=5, res=1800)
#pp2 + theme(axis.text = element_text(size = 14), legend.text=element_text(size=10), legend.title=element_text(size=15),
#            axis.title=element_text(size=15)) 
#dev.off()

3D

#3D
plot3d(
  x=fepdata$pAkt.value_z, y=fepdata$pS6.value_z, z=fepdata$pGSK3.value_z,
  col = fepdata$colour,
  type = 's',
  radius = 0.05,
  xlab="pAkt", ylab="pS6", zlab="GSK3")
#writeWebGL(filename="3dscatter_all.html" , width=600, height=600)

Open The attached 3dscatter_all.html to view the 3D plot.

Controls Only

fepdata_temp = (subset(fepdata,Phenotype==0))

sp3b<-ggplot(fepdata_temp, aes(x=pAkt.value_z, y=pS6.value_z, color=pGSK3.value_z)) + geom_point(size=4, shape=19) +
            ggtitle("Controls") + 
            scale_color_gradient(low="white", high="#0000cc",limits=c(gsk_min-0.5,gsk_max)) +
            xlim(akt_min-0.1,akt_max+0.1) + ylim(ps6_min-0.1,ps6_max+0.1) +
            theme_bw()

sp3b

sp3b2<-ggplot(fepdata_temp, aes(x=pGSK3.value_z, y=pS6.value_z, color=pAkt.value_z)) + geom_point(size=4, shape=19) +
            ggtitle("Controls") + 
            scale_color_gradient(low="white", high="#0000cc",limits=c(akt_min-0.5,akt_max)) +
            xlim(gsk_min-0.1,gsk_max+0.1) + ylim(ps6_min-0.1,ps6_max+0.1) +
            theme_bw()

sp3b2

par(mar=c(0,0,0,0))
plot3d(
  x=fepdata_temp$pAkt.value_z, y=fepdata_temp$pS6.value_z, z=fepdata_temp$pGSK3.value_z,
  col = fepdata_temp$colour,
  type = 's',
  radius = 0.05,
  xlab="pAkt", ylab="pS6", zlab="GSK3")

#writeWebGL( filename="3dscatter_controls.html" ,  width=600, height=600)

Open The attached 3dscatter_controls.html to view the 3D plot

Cases Only

fepdata_temp = (subset(fepdata,Phenotype==1))

sp4a<-ggplot(fepdata_temp, aes(x=pAkt.value_z, y=pS6.value_z, color=pGSK3.value_z)) + geom_point(size=4, shape=19) +
            ggtitle("Cases") + 
            scale_color_gradient(low="white", high="#cc0000",limits=c(gsk_min-0.5,gsk_max)) +
            xlim(akt_min-0.1,akt_max+0.1) + ylim(ps6_min-0.1,ps6_max+0.1) +
            theme_bw()
sp4a

sp4a2<-ggplot(fepdata_temp, aes(x=pGSK3.value_z, y=pS6.value_z, color=pAkt.value_z)) + geom_point(size=4, shape=19) +
            ggtitle("Cases") + 
            scale_color_gradient(low="white", high="#cc0000",limits=c(akt_min-0.5,akt_max)) +
            xlim(gsk_min-0.1,gsk_max+0.1) + ylim(ps6_min-0.1,ps6_max+0.1) +
            theme_bw()
sp4a2

par(mar=c(0,0,0,0))
plot3d(
  x=fepdata_temp$pAkt.value, y=fepdata_temp$pS6.value, z=fepdata_temp$pGSK3.value,
  col = fepdata_temp$colour,
  type = 's',
  radius = 0.05,
  xlab="pAkt", ylab="pS6", zlab="GSK3")

writeWebGL( filename="3dscatter_cases.html" ,  width=600, height=600)

Open The attached 3dscatter_cases.html to view the 3D plot

Controls vs Cases but different graphs

grid.arrange(sp3b,sp4a,ncol=2)

Akt vs GSK3 correlation in Cases vs Controls

print_p_cor <- function(x,y,digits=4,prefix=""){
 cortest <- cor.test(x, y, use="complete.obs", method = "spearman", exact = F)
 r = abs(cortest$estimate)
 p = cortest$p.value
 txt1 <- format(c(r, 0.123456789), digits = digits)[1]
 txt2 <- format(c(p, 0.123456789), digits = digits)[1]
 txt <- paste0(prefix, "r=",txt1, ", ", "p=", txt2)
 return(txt)
}

fepdata_cont = (subset(fepdata,Phenotype==0))
fepdata_case = (subset(fepdata,Phenotype==1))

cont_plot<-ggplot(fepdata_cont, aes(x=pAkt.value_z, y=pGSK3.value_z)) + geom_point(size=2, colour="#253582ff") +
           annotate("text", x=2.4, y=-2, size=3, label=paste0(print_p_cor(fepdata_cont$pAkt.value,fepdata_cont$pGSK3.value)," ns")) + 
           xlab("pAkt") + ylab("pGSK3") + ggtitle("Controls") + stat_smooth(method=lm, colour="#253582ff") + theme_classic()
case_plot<-ggplot(fepdata_case, aes(x=pAkt.value_z, y=pGSK3.value_z)) + geom_point(size=2, colour="#de7065ff") +
           annotate("text", x=1.2, y=-2, size=3, label=paste0(print_p_cor(fepdata_case$pAkt.value,fepdata_case$pGSK3.value)," ns")) + 
           xlab("pAkt") + ylab("pGSK3") + ggtitle("Cases") + stat_smooth(method=lm, colour="#de7065ff") + theme_classic()

grid.arrange(cont_plot, case_plot, ncol=2)

Session Info for reproducibility

## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] gridExtra_2.3     viridis_0.6.1     viridisLite_0.4.0 broom_0.7.7      
##  [5] ggpubr_0.4.0      ggnewscale_0.4.5  ggplot2_3.3.4     car_3.0-10       
##  [9] carData_3.0-4     readr_1.4.0       dplyr_1.0.7       tidyr_1.1.3      
## [13] rglwidget_0.2.1   rgl_0.106.8       knitr_1.33       
## 
## loaded via a namespace (and not attached):
##  [1] sass_0.4.0              jsonlite_1.7.2          splines_4.1.0          
##  [4] bslib_0.2.5.1           shiny_1.6.0             highr_0.9              
##  [7] cellranger_1.1.0        yaml_2.2.1              lattice_0.20-44        
## [10] pillar_1.6.1            backports_1.2.1         glue_1.4.2             
## [13] digest_0.6.27           manipulateWidget_0.11.0 promises_1.2.0.1       
## [16] ggsignif_0.6.2          colorspace_2.0-1        Matrix_1.3-3           
## [19] htmltools_0.5.1.1       httpuv_1.6.1            pkgconfig_2.0.3        
## [22] haven_2.4.1             purrr_0.3.4             xtable_1.8-4           
## [25] scales_1.1.1            openxlsx_4.2.4          later_1.2.0            
## [28] rio_0.5.27              tibble_3.1.2            mgcv_1.8-35            
## [31] generics_0.1.0          farver_2.1.0            ellipsis_0.3.2         
## [34] withr_2.4.2             magrittr_2.0.1          crayon_1.4.1           
## [37] readxl_1.3.1            mime_0.11               evaluate_0.14          
## [40] fansi_0.5.0             nlme_3.1-152            rstatix_0.7.0          
## [43] forcats_0.5.1           foreign_0.8-81          tools_4.1.0            
## [46] data.table_1.14.0       hms_1.1.0               lifecycle_1.0.0        
## [49] stringr_1.4.0           munsell_0.5.0           zip_2.2.0              
## [52] compiler_4.1.0          jquerylib_0.1.4         rlang_0.4.11           
## [55] grid_4.1.0              htmlwidgets_1.5.3       crosstalk_1.1.1        
## [58] miniUI_0.1.1.1          labeling_0.4.2          rmarkdown_2.9          
## [61] gtable_0.3.0            abind_1.4-5             curl_4.3.2             
## [64] R6_2.5.0                fastmap_1.1.0           utf8_1.2.1             
## [67] stringi_1.6.2           Rcpp_1.0.6              vctrs_0.3.8            
## [70] tidyselect_1.1.1        xfun_0.24